import json

import pandas as pd

txtfile = open(r"D:\Chrome_download\Correted-ProgrammableWeb-dataset-main\data\raw\api_mashup\active_mashups_data.txt")
txtcontent = txtfile.read()
jsondata = json.loads(txtcontent)




relate_tags = {}

for api in jsondata:
    if api != None:
        for categorie in api["categories"]:
            if categorie:
                if categorie in relate_tags.keys():
                    for related_api in api["related_apis"]:
                        relate_tags[categorie]["ALL_SUM"] += 1
                        if related_api:
                            for tag in related_api["tags"]:
                                if tag in relate_tags[categorie].keys():
                                    relate_tags[categorie][tag] += 1
                                else:
                                    relate_tags[categorie][tag] = 0
                else:
                    relate_tags[categorie] = {'ALL_SUM':0}
                    for related_api in api["related_apis"]:
                        relate_tags[categorie]["ALL_SUM"] += 1
                        if related_api:
                            for tag in related_api["tags"]:
                                if tag in relate_tags[categorie].keys():
                                    relate_tags[categorie][tag] += 1
                                else:
                                    relate_tags[categorie][tag] = 0
        



tag_lists =  sorted(relate_tags.items(), key=lambda x: -x[1]["ALL_SUM"])

sorted_tag = []
sorted_category = []
for tag_list in tag_lists:
    tag_list = list(tag_list)
    tag_list[1] = sorted(tag_list[1].items(),key=lambda x:-x[1])
    sorted_tag.append(tag_list[1])
    sorted_category.append(tag_list[0])


column = ["category", "tag"]
df = pd.DataFrame(zip(sorted_category, sorted_tag), columns=column)
df.to_csv("relate_tags.csv")





